"""
Changelog parser
================

After running `github_changelog_generator kivy/kivy --token xxx --no-issues \
--no-author --since-tag "1.10.1" --max-issues 1000`, it generates a changelog
file with all the PRs that were merged since the listed release. `max-issues`
needs to be larger than the number of PRs and issues that were closed
(opened?) since the listed release. Otherwise, the script tries to get
everything and github ratelimits and will block it for an hour so the changelog
will fail.

Next, Clean the file so that it only has
the list of PRs (each line is a PR, nothing else in the file), remove
everything else manually.

Each line should look like, nothing else can be in the file:

    - \\[packaging\\]Fixes for PPA and CI [\\#6306]\
    (https://github.com/kivy/kivy/pull/6306)

Every PR should start with a label indicating the section it is in (i.e.
`[...]`). Do this before generating the log by updating the title of each
pull request on github. E.g. the PR above would look like:

    [packaging]Fixes for PPA and CI

on github. In addition to the first tag, there can be a second tag with value:
`[highlight]`, which will be in a new section. Then generate a final changelog
by running:

    python changelog_parser.py input output

where input is the file generated by `github_changelog_generator` and output
is the name of the desired file.
"""
from os.path import exists
import re
from collections import defaultdict
from functools import partial
import sys

__all__ = ('process_changelog', )


def _remove_escaping(value):
    return value.replace(
        r'\[', '[').replace(r'\]', ']').replace(r'\#', '#').replace(
        r'\(', '(').replace(r'\)', ')').replace(r'\_', '_').replace(r'\>', '>')


def _format_pr_list(items, indent=0):
    return '\n'.join('{}{}'.format(' ' * indent, item) for item in items)


def process_changelog(filename_in, filename_out):
    if exists(filename_out):
        raise ValueError(
            '{} already exists and would be overwritten'.format(filename_out))

    highlight, deprecate, items, nested_items, unknown = _get_pulls(
        filename_in)
    items.update(nested_items)

    with open(filename_out, 'w') as fh:
        if highlight:
            fh.write('Highlights\n==========\n\n')
            fh.write(_format_pr_list(highlight, 4))
            fh.write('\n\n')

        if deprecate:
            fh.write('Deprecated\n==========\n\n')
            fh.write(_format_pr_list(deprecate, 4))
            fh.write('\n\n')

        for section, values in sorted(items.items(), key=lambda x: x[0]):
            fh.write('{}\n{}\n\n'.format(section, '-' * len(section)))

            if isinstance(values, list):
                fh.write(_format_pr_list(values, 4))
                fh.write('\n\n')
            else:
                for subsection, prs in sorted(
                        values.items(), key=lambda x: x[0]):
                    fh.write('{}{}\n\n'.format(' ' * 4, subsection))
                    fh.write(_format_pr_list(prs, 4))
                    fh.write('\n\n')

        if unknown:
            fh.write('Unknown\n=======\n\n')
            fh.write(_format_pr_list(unknown, 4))
            fh.write('\n\n')


def _get_pulls(filename_in):
    with open(filename_in) as fh:
        lines = [line.strip() for line in fh.read().splitlines()
                 if line.strip()]

    pat = re.compile(
        r'^- (?:\\\[(.+?)\\\])?(?:\\\[(highlight)\\\])?(.+?)\[\\#([0-9]+)\]'
        r'\(https://github.com/kivy/kivy/pull/[0-9]+\)$')

    items = defaultdict(list)
    nested_items = defaultdict(partial(defaultdict, list))
    unknown = []
    highlight = []
    deprecate = []

    for line in lines:
        m = re.match(pat, line)

        if m is None:
            line = _remove_escaping(line)
            unknown.append(line)
            continue

        label, highlighted, title, num = m.groups()
        title = _remove_escaping(title)
        title = title.strip()
        item = '- [#{}]: {}'.format(num, title)

        if highlighted:
            highlight.append(item)
            continue

        label = label.lower()
        if label == 'docs':
            label = 'doc'

        if label == 'deprecated':
            label = 'deprecate'

        if label == 'deprecate':
            deprecate.append(item)
            continue

        if '/' in label:
            parent, child = label.split('/')
            nested_items[parent.title()][child.title()].append(item)
        else:
            if label == 'ci':
                label = 'CI'
            else:
                label = label.title()
            items[label].append(item)

    return highlight, deprecate, items, nested_items, unknown


if __name__ == '__main__':
    process_changelog(*sys.argv[1:])
